#!/bin/sh
#
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

#
# Convenience script to run MPI applications with UCX
#
# Usage: run_mpi.sh <options> <executable> <arguments>
#

verbose()
{
	[ $VERBOSE -ne 0 ] && echo "$@"
}

check_slurm_env()
{
	if [ -z "$SLURM_JOBID" ]
	then
		# Search for jobs of the current user
		SLURM_JOBID=$(squeue -h -u $USER -o "%i"|head -1)
	fi

	if [ -z "$SLURM_JOBID" ]
	then
		# Skip slurm
		return
	fi

	# Nodes to run on
	export HOSTS=$(hostlist -e $(squeue -j ${SLURM_JOBID} -h -o "%N"))
	SLURM_NNODES=$(squeue -j ${SLURM_JOBID} -h -o "%D")
	NNODES=$SLURM_NNODES

	if [ -n "$SLURM_JOB_CPUS_PER_NODE" ]
	then
		export PPN=$(echo $SLURM_JOB_CPUS_PER_NODE|cut -d'(' -f1)
	else
		TOTAL_CPUS=$(squeue  -j ${SLURM_JOBID} -h -o "%C")
		export PPN=$((${TOTAL_CPUS} / ${SLURM_NNODES}))
	fi

}

usage()
{
	echo "Usage: run_mpi.sh <options> <executable> <arguments> -- <additional arguments to launcher>"
	echo
	echo "  -h|--help                     Show this help message"
	echo "  -v|--verbose                  Turn on verbosity"
	echo "  -c|--config  <name>=<value>   Set UCX configuration"
	echo "  -N|--nnodes  <count>          Number of nodes to run on ($NNODES)"
	echo "  --ppn  <count>                Number of processes per node ($PPN)"
	echo "  --mpi-log-level <level>       Log level for MPI UCX component ($MPI_LOG_LEVEL)"
	echo "  --valgrind                    Run with valgrind"
	echo "  --valgrind-args \"<args>\"      Extra arguments to valgrind"
	echo
}

initialize()
{
	export MPIRUN=@MPIRUN@
	export LIBUCS=@abs_top_builddir@/src/ucs/.libs/libucs.so
	export LIBUCT=@abs_top_builddir@/src/uct/.libs/libuct.so
	export LIBUCP=@abs_top_builddir@/src/ucp/.libs/libucp.so
	export VERBOSE=0
	export EXE=""
	export EXE_ARGS=""
	export EXTRA_MPI_ARGS=""
	export NNODES=1
	export PPN=1
	export CONFIG=""
	export MPI_LOG_LEVEL=0
	export VALGRIND=0
	export VALGRIND_ARGS=""
}

parse_args()
{
	while [[ $# -gt 0 ]]
	do
		key="$1"
		case $key in
		-h|--help)
			usage
			exit 0
			;;
		-v|--verbose)
			export VERBOSE=1
			;;
		-c|--config)
			export CONFIG="$CONFIG $2"
			shift
			;;
		-N|--nnodes)
			export NNODES=$2
			shift
			;;
		--ppn)
			export PPN=$2
			shift
			;;
		--mpi-log-level)
			export MPI_LOG_LEVEL=$2
			shift
			;;
		--valgrind)
			export VALGRIND=1
			;;
		--valgrind-args)
			export VALGRIND_ARGS="$2"
			shift
			;;
		[^-]*)
			export EXE=$key
			shift
			break
			;;
		*)
			usage
			exit -2
			;;
		esac
		shift
	done

	while [[ $# -gt 0 ]]
	do
		key="$1"
		case $key in
		--)
			shift
			export EXTRA_MPI_ARGS="$@"
			break
			;;
		*)
			EXE_ARGS+=("$key")
			;;
		esac
		shift
	done
}

adjust_run_params()
{
	export NP=$((${NNODES} * ${PPN}))
	export HOSTLIST=$(echo $HOSTS|cut -d' ' -f 1-$NNODES|tr ' ' ',')
}

run_open_mpi()
{
	OMPI_ARGS=""
	OMPI_ARGS="$OMPI_ARGS -mca pml ucx"
	OMPI_ARGS="$OMPI_ARGS -mca pml_ucx_verbose $MPI_LOG_LEVEL"
	OMPI_ARGS="$OMPI_ARGS -mca spml ucx"
	OMPI_ARGS="$OMPI_ARGS -mca spml_ucx_verbose $MPI_LOG_LEVEL"
	OMPI_ARGS="$OMPI_ARGS -H $HOSTLIST"
	OMPI_ARGS="$OMPI_ARGS -n $NP"
	OMPI_ARGS="$OMPI_ARGS --map-by node"
	OMPI_ARGS="$OMPI_ARGS -mca ess_base_stream_buffering 0"
	OMPI_ARGS="$OMPI_ARGS -mca mpi_abort_delay -1"

	OMPI_ARGS="$OMPI_ARGS -x LD_PRELOAD=$LD_PRELOAD:$LIBUCP"
	if [ $VALGRIND -ne 0 ]
	then
		# Preload valgrind-enabled libraries
		for lib in /usr/lib64/mlnx_ofed/valgrind/*.so
		do
			[ -f $lib ] && OMPI_ARGS="$OMPI_ARGS:$lib"
		done
	fi

	OMPI_ARGS="$OMPI_ARGS -x UCX_HANDLE_ERRORS=freeze"
	for c in $CONFIG
	do
		OMPI_ARGS="$OMPI_ARGS -x $c"
	done

	if [ $VALGRIND -ne 0 ]
	then
		MPI_HOME=$(cd $(dirname ${MPIRUN})/.. && pwd)
		EXE="valgrind \
			--fair-sched=try \
			--track-origins=yes \
			--leak-check=yes \
			--suppressions=${MPI_HOME}/share/openmpi/openmpi-valgrind.supp \
			--suppressions=@abs_srcdir@/ompi.supp \
			$VALGRIND_ARGS \
			$EXE"
		LD_LIBRARY_PATH="$LD_LIBRARY_PATH:@VALGRIND_LIBPATH@"
	fi

	OMPI_ARGS="$OMPI_ARGS -x LD_LIBRARY_PATH"

	export LD_LIBRARY_PATH
	verbose $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}"
	$MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}"
}

main()
{
	EXE_ARGS=()
	initialize
	check_slurm_env
	parse_args "$@"
	adjust_run_params

	if (strings $MPIRUN|grep -qi orte) && ($MPIRUN -h|grep -q "Open MPI")
	then
		run_open_mpi
	else
		echo "Unrecognized MPI flavor ($MPIRUN)"
		exit -3
	fi
}

main "$@"
